# Run preamble
source("code/preamble.R")

# Load data
study1_data <- readRDS("data/study1_data.rds")

# Tercile-split sample by numeracy
study1_data$num_tercile <- study1_data$numeracy %>%
    ntile(3) %>%
    factor(labels = c("Low", "Medium", "High"))

# Check for differences in means
## Reshaping data
num_mean_data <- study1_data %>%
    select(
        pid2, policy, dem_mean, rep_mean, numeracy, num_tercile,
        gender, race, latinx, edu_factor, age_factor, par_pos
    ) %>%
    pivot_longer(
        cols = matches("_mean"),
        names_pattern = "(.*)_mean",
        names_to = "party",
        values_to = "mean"
    ) %>%
    na.omit() %>%
    mutate(party = factor(party,
        levels = c("dem", "rep"),
        labels = c("Perceptions of Democrats", "Perceptions of Republicans")
    )) %>%
    mutate(pid2 = case_when(
        pid2 == "Democrat" ~ "Among Democrats",
        pid2 == "Republican" ~ "Among Republicans"
    ))

## Grab actual means
actual_means <- study1_data %>%
    select(pid2, policy, par_pos) %>%
    group_by(pid2, policy) %>%
    summarise(`Reality` = mean(par_pos)) %>%
    rename(party = pid2) %>%
    mutate(party = factor(party,
        levels = c("Democrat", "Republican"),
        labels = c("Perceptions of Democrats", "Perceptions of Republicans")
    ))

## Merge data
num_mean_data <- num_mean_data %>%
    left_join(actual_means, by = c("party", "policy")) %>%
    mutate(error = abs(mean - Reality))

# Check for differences in SDs
## Reshaping data
num_sd_data <- study1_data %>%
    select(
        pid2, policy, dem_sd, rep_sd, numeracy, num_tercile, gender, race,
        latinx, edu_factor, age_factor, par_pos
    ) %>%
    pivot_longer(
        cols = matches("_sd"),
        names_pattern = "(.*)_sd",
        names_to = "party",
        values_to = "sd"
    ) %>%
    na.omit() %>%
    mutate(party = factor(party,
        levels = c("dem", "rep"),
        labels = c("Perceptions of Democrats", "Perceptions of Republicans")
    )) %>%
    mutate(pid2 = case_when(
        pid2 == "Democrat" ~ "Among Democrats",
        pid2 == "Republican" ~ "Among Republicans"
    ))

## Grab actual sds
actual_sds <- study1_data %>%
    select(pid2, policy, par_pos) %>%
    group_by(pid2, policy) %>%
    summarise(`Reality` = sd(par_pos)) %>%
    rename(party = pid2) %>%
    mutate(party = factor(party,
        levels = c("Democrat", "Republican"),
        labels = c("Perceptions of Democrats", "Perceptions of Republicans")
    ))

## Merge data
num_sd_data <- num_sd_data %>%
    left_join(actual_sds, by = c("party", "policy")) %>%
    mutate(error = abs(sd - Reality))

# Models
## SDs
num_sd_error_model <- lm_robust(
    error ~ numeracy + policy + pid2 + gender + race + latinx +
        edu_factor + age_factor + par_pos + party,
    data = num_sd_data
)

num_sd_dem_model <- lm_robust(
    sd ~ numeracy + policy + pid2 + gender + race + latinx +
        edu_factor + age_factor + par_pos,
    data = num_sd_data %>% subset(party == "Perceptions of Democrats")
)

num_sd_rep_model <- lm_robust(
    sd ~ numeracy + policy + pid2 + gender + race + latinx +
        edu_factor + age_factor + par_pos,
    data = num_sd_data %>% subset(party == "Perceptions of Republicans")
)

## Means
num_mean_error_model <- lm_robust(
    error ~ numeracy + policy + pid2 + gender + race + latinx +
        edu_factor + age_factor + par_pos + party,
    data = num_mean_data
)

num_mean_dem_model <- lm_robust(
    mean ~ numeracy + policy + pid2 + gender + race + latinx +
        edu_factor + age_factor + par_pos,
    data = num_mean_data %>% subset(party == "Perceptions of Democrats")
)

num_mean_rep_model <- lm_robust(
    mean ~ numeracy + policy + pid2 + gender + race + latinx +
        edu_factor + age_factor + par_pos,
    data = num_mean_data %>% subset(party == "Perceptions of Republicans")
)

# Tables
## SDs
modelsummary(
    list(
        "Error in Perceived-Distribution SD" = num_sd_error_model,
        "Perceived-Distribution SD (Democrats)" = num_sd_dem_model,
        "Perceived-Distribution SD (Republicans)" = num_sd_rep_model
    ),
    stars = TRUE,
    coef_rename = c(
        "(Intercept)",
        "Numeracy",
        "Policy Issue is Border Control",
        "Policy Issue is Abortion Access",
        "Participant is Republican",
        "Participant is a Woman",
        "Participant is Another Gender",
        "Participant is Black",
        "Participant is White",
        "Participant is Multi-Racial",
        "Participant is Another Race",
        "Participant is Hispanic",
        "Participant has an Associate's Degree",
        "Participant has a Bachelor's Degree",
        "Participant has a Post-graduate Degree",
        "Participant is 26--34 years old",
        "Participant is 35--49 years old",
        "Participant is 50--64 years old",
        "Participant is 65+ years old",
        "Conservatism of Participant's Policy Attitude",
        "Perceived Distribution Represents Republicans' Attitudes"
    ),
    notes = "Note: The reference categories for factor variables are as
    follows: gun control (policy issue), Democrat (partisanship),
    man (gender), Asian (race), not Hispanic (whether Hispanic), high school
    degree or less (education), 18--25 years old (age), and Democrat (subject
    of perceived distribution). This table includes data from Study 1.",
    title = "Effect of Numeracy on Perceived-Distribution SDs",
    output = "tables/numeracy_effects_on_dist_sds.txt"
)

## Means
modelsummary(
    list(
        "Error in Perceived-Distribution Mean" = num_mean_error_model,
        "Perceived-Distribution Mean (Democrats)" = num_mean_dem_model,
        "Perceived-Distribution Mean (Republicans)" = num_mean_rep_model
    ),
    stars = TRUE,
    coef_rename = c(
        "(Intercept)",
        "Numeracy",
        "Policy Issue is Border Control",
        "Policy Issue is Abortion Access",
        "Participant is Republican",
        "Participant is a Woman",
        "Participant is Another Gender",
        "Participant is Black",
        "Participant is White",
        "Participant is Multi-Racial",
        "Participant is Another Race",
        "Participant is Hispanic",
        "Participant has an Associate's Degree",
        "Participant has a Bachelor's Degree",
        "Participant has a Post-graduate Degree",
        "Participant is 26--34 years old",
        "Participant is 35--49 years old",
        "Participant is 50--64 years old",
        "Participant is 65+ years old",
        "Conservatism of Participant's Policy Attitude",
        "Perceived Distribution Represents Republicans' Attitudes"
    ),
    notes = "Note: The reference categories for factor variables are as
    follows: gun control (policy issue), Democrat (partisanship),
    man (gender), Asian (race), not Hispanic (whether Hispanic), high school
    degree or less (education), 18--25 years old (age), and Democrat (subject
    of perceived distribution). This table includes data from Study 1.",
    title = "Effect of Numeracy on Perceived-Distribution Means",
    output = "tables/numeracy_effects_on_dist_means.txt"
)

# Print disclaimer
cat(
    "\n\n",
    " See numeracy_effects_on_dist_sds.txt for regressions of perceived-distribution SDs on numeracy (i.e., Supplementary Table 4).",
    "\n\n",
    " See numeracy_effects_on_dist_means.txt for regressions of perceived-distribution MEANs on numeracy (i.e., Supplementary Table 5)."
)
